Setup

library(ggplot2)
library(tidyverse)
library(knitr)
library(cowplot)
library(viridis)
library(RColorBrewer)
library(rstatix)
library(ggsignif)
library(Hmisc)
library(kableExtra)
#source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
source("~/Downloads/geom_flat_violin.R")
library(readr)
library(stringr)
library(ggpubr)
library(infotheo)

These analyses were conducted in the following computing environment:

print(version)
##                _                           
## platform       x86_64-pc-linux-gnu         
## arch           x86_64                      
## os             linux-gnu                   
## system         x86_64, linux-gnu           
## status                                     
## major          4                           
## minor          0.4                         
## year           2021                        
## month          02                          
## day            15                          
## svn rev        80002                       
## language       R                           
## version.string R version 4.0.4 (2021-02-15)
## nickname       Lost Library Book
# Labeler for stats annotations
p_label <- function(p_value) {
  threshold = 0.0001
  if (p_value < threshold) {
    return(paste0("p < ", threshold))
  } else {
    return(paste0("p = ", p_value))
  }
}

# Significance threshold
alpha <- 0.05

####### misc #######
# Configure our default graphing theme
theme_set(theme_cowplot())
#data_loc <- "../high_n_timeseries.csv"
#data_loc <- "~/repos/GPTP-2021-exploration-diagnostic-phylodiversity/high_n_timeseries.csv"
data_loc <- "~/repos/ecology_in_EC_parameter_sweep/all_data.csv"

data <- read_csv(data_loc, na=c("NONE", "NA", ""))

data <- data %>% 
  filter(N==20, generation %%10 == 0) %>%
  mutate(
  selection_name = as.factor(case_when(
    SELECTION == 0 ~ "Tournament",
    SELECTION == 1 ~ "Fitness sharing",
    SELECTION == 2 ~ "Lexicase",
    SELECTION == 3 ~ "Eco-EA",
    SELECTION == 4 ~ "Random",
  )), 
  problem_name = as.factor(case_when(
    PROBLEM == 0 ~ "NK Landscape",
    PROBLEM == 1 ~ "Count Odds",
    PROBLEM == 2 ~ "Real-valued optimization",
    PROBLEM == 3 ~ "Sorting network",
    PROBLEM == 4 ~ "Logic-9"    
  ))
)

final_data <- filter(data, generation==max(data$generation))

Performance

Over time

ggplot(
    data,
    aes(
      x=generation,
      y=max_performance,
      color=selection_name,
      fill=selection_name
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_y_continuous(
    name="Average trait performance"
  ) +
  scale_x_continuous(
    name="Generation"
  ) +
  scale_color_discrete("Selection") + 
  scale_fill_discrete("Selection") +
  facet_wrap(~problem_name, scales="free")

Final

# Compute manual labels for geom_signif
stat.test <- final_data %>%
  wilcox_test(max_performance ~ selection_name) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance() %>%
  add_xy_position(x="selection_name",step.increase=1)
#stat.test$manual_position <- stat.test$y.position * .5
#stat.test$manual_position <- c(110, 150, 170, 170, 130, 110)
stat.test$label <- mapply(p_label,stat.test$p.adj)
ggplot(
    final_data,
    aes(
      x=selection_name,
      y=max_performance,
      fill=selection_name
    )
  ) +
  geom_boxplot() +
  scale_y_continuous(
    name="Average trait performance"
  ) +
  scale_x_discrete(
    name="Selection"
  ) +
  scale_fill_discrete(
    name="Selection"
  ) +
  scale_color_discrete(
    name="Selection"
  ) + 
  theme(legend.position="none") +
  facet_wrap(~problem_name, scales="free")

stat.test %>%
  kbl() %>%
  kable_styling(
    bootstrap_options = c(
      "striped",
      "hover",
      "condensed",
      "responsive"
    )
  ) %>%
  scroll_box(width="600px")
.y. group1 group2 n1 n2 statistic p p.adj p.adj.signif y.position groups xmin xmax label
max_performance Eco-EA Fitness sharing 240 240 29310.5 7.37e-01 1.000000 ns 125237.4 Eco-EA , Fitness sharing 1 2 p = 1
max_performance Eco-EA Lexicase 240 240 23075.0 1.46e-04 0.001460 ** 178661.5 Eco-EA , Lexicase 1 3 p = 0.00146
max_performance Eco-EA Random 240 240 39696.5 0.00e+00 0.000000 **** 232085.6 Eco-EA, Random 1 4 p < 1e-04
max_performance Eco-EA Tournament 240 240 29346.0 7.18e-01 1.000000 ns 285509.7 Eco-EA , Tournament 1 5 p = 1
max_performance Fitness sharing Lexicase 240 240 22328.5 2.01e-05 0.000201 *** 338933.8 Fitness sharing, Lexicase 2 3 p = 0.000201
max_performance Fitness sharing Random 240 240 41293.0 0.00e+00 0.000000 **** 392358.0 Fitness sharing, Random 2 4 p < 1e-04
max_performance Fitness sharing Tournament 240 240 27500.5 3.92e-01 1.000000 ns 445782.1 Fitness sharing, Tournament 2 5 p = 1
max_performance Lexicase Random 240 240 44132.5 0.00e+00 0.000000 **** 499206.2 Lexicase, Random 3 4 p < 1e-04
max_performance Lexicase Tournament 240 240 34831.5 6.61e-05 0.000661 *** 552630.3 Lexicase , Tournament 3 5 p = 0.000661
max_performance Random Tournament 240 240 18254.0 0.00e+00 0.000000 **** 606054.4 Random , Tournament 4 5 p < 1e-04

Phylogenetic diversity

Over time

ggplot(
    data,
    aes(
      x=generation,
      y=mean_phenotype_pairwise_distance,
      color=selection_name,
      fill=selection_name
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_y_log10(
    name="Mean pairwise distance"
  ) +
  scale_x_continuous(
    name="Generation"
  ) +
  scale_color_discrete("Selection") +
  scale_fill_discrete("Selection") +
  facet_wrap(~problem_name, scales = "free")

Final

# Compute manual labels for geom_signif
stat.test <- final_data %>%
  wilcox_test(mean_phenotype_pairwise_distance ~ selection_name) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance() %>%
  add_xy_position(x="selection_name",step.increase=1)
#stat.test$manual_position <- stat.test$y.position * .5
#stat.test$manual_position <- c(110, 150, 170, 170, 130, 110)
stat.test$label <- mapply(p_label,stat.test$p.adj)
ggplot(
    final_data,
    aes(
      x=selection_name,
      y=mean_phenotype_pairwise_distance,
      fill=selection_name
    )
  ) +
  geom_boxplot() +
  scale_y_log10(
    name="Mean pairwise distance"
  ) +
  scale_x_discrete(
    name="Selection"
  ) +
  scale_fill_discrete(
    name="Selection"
  ) +
  scale_color_discrete(
    name="Selection"
  ) + 
  theme(legend.position = "none") +
    facet_wrap(~problem_name, scales = "free")

stat.test %>%
  kbl() %>%
  kable_styling(
    bootstrap_options = c(
      "striped",
      "hover",
      "condensed",
      "responsive"
    )
  ) %>%
  scroll_box(width="600px")
.y. group1 group2 n1 n2 statistic p p.adj p.adj.signif y.position groups xmin xmax label
mean_phenotype_pairwise_distance Eco-EA Fitness sharing 240 240 2831.0 0.00e+00 0.000000 **** 9718.758 Eco-EA , Fitness sharing 1 2 p < 1e-04
mean_phenotype_pairwise_distance Eco-EA Lexicase 240 240 25159.0 1.70e-02 0.170000 ns 15104.178 Eco-EA , Lexicase 1 3 p = 0.17
mean_phenotype_pairwise_distance Eco-EA Random 240 240 9552.0 0.00e+00 0.000000 **** 20489.598 Eco-EA, Random 1 4 p < 1e-04
mean_phenotype_pairwise_distance Eco-EA Tournament 240 240 43052.5 0.00e+00 0.000000 **** 25875.018 Eco-EA , Tournament 1 5 p < 1e-04
mean_phenotype_pairwise_distance Fitness sharing Lexicase 240 240 33683.0 1.00e-03 0.010000 ** 31260.438 Fitness sharing, Lexicase 2 3 p = 0.01
mean_phenotype_pairwise_distance Fitness sharing Random 240 240 40738.0 0.00e+00 0.000000 **** 36645.858 Fitness sharing, Random 2 4 p < 1e-04
mean_phenotype_pairwise_distance Fitness sharing Tournament 240 240 57314.0 0.00e+00 0.000000 **** 42031.278 Fitness sharing, Tournament 2 5 p < 1e-04
mean_phenotype_pairwise_distance Lexicase Random 240 240 28272.0 8.49e-01 1.000000 ns 47416.698 Lexicase, Random 3 4 p = 1
mean_phenotype_pairwise_distance Lexicase Tournament 240 240 34801.0 7.84e-05 0.000784 *** 52802.118 Lexicase , Tournament 3 5 p = 0.000784
mean_phenotype_pairwise_distance Random Tournament 240 240 54927.0 0.00e+00 0.000000 **** 58187.538 Random , Tournament 4 5 p < 1e-04

Phenotypic diversity

Over time

ggplot(
    data,
    aes(
      x=generation,
      y=phenotype_num_taxa,
      color=selection_name,
      fill=selection_name
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_y_continuous(
    name="Phenotypic richness"
  ) +
  scale_x_continuous(
    name="Generation"
  ) +
  scale_color_discrete("Selection") + 
  scale_fill_discrete("Selection") +
  facet_wrap(~problem_name, scales = "free")

Final

# Compute manual labels for geom_signif
stat.test <- final_data %>%
  wilcox_test(phenotype_num_taxa ~ selection_name) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance() %>%
  add_xy_position(x="selection_name",step.increase=1)
#stat.test$manual_position <- stat.test$y.position * .5
#stat.test$manual_position <- c(110, 150, 170, 170, 130, 110)
stat.test$label <- mapply(p_label,stat.test$p.adj)
ggplot(
    final_data,
    aes(
      x=selection_name,
      y=phenotype_num_taxa,
      fill=selection_name
    )
  ) +
  geom_boxplot() +
  scale_y_continuous(
    name="Phenotypic Richness"
  ) +
  scale_x_discrete(
    name="Selection"
  ) +
  scale_fill_discrete(
    name="Selection"
  ) +
  scale_color_discrete(
    name="Selection"
  ) +
  theme(legend.position = "none") +
  facet_wrap(~problem_name, scales = "free")

stat.test %>%
  kbl() %>%
  kable_styling(
    bootstrap_options = c(
      "striped",
      "hover",
      "condensed",
      "responsive"
    )
  ) %>%
  scroll_box(width="600px")
.y. group1 group2 n1 n2 statistic p p.adj p.adj.signif y.position groups xmin xmax label
phenotype_num_taxa Eco-EA Fitness sharing 240 240 2406.0 0.000000 0.00000 **** 1507.000 Eco-EA , Fitness sharing 1 2 p < 1e-04
phenotype_num_taxa Eco-EA Lexicase 240 240 14833.5 0.000000 0.00000 **** 2070.333 Eco-EA , Lexicase 1 3 p < 1e-04
phenotype_num_taxa Eco-EA Random 240 240 34261.5 0.000326 0.00326 ** 2633.667 Eco-EA, Random 1 4 p = 0.00326
phenotype_num_taxa Eco-EA Tournament 240 240 29587.5 0.604000 1.00000 ns 3197.000 Eco-EA , Tournament 1 5 p = 1
phenotype_num_taxa Fitness sharing Lexicase 240 240 42195.5 0.000000 0.00000 **** 3760.333 Fitness sharing, Lexicase 2 3 p < 1e-04
phenotype_num_taxa Fitness sharing Random 240 240 57231.0 0.000000 0.00000 **** 4323.667 Fitness sharing, Random 2 4 p < 1e-04
phenotype_num_taxa Fitness sharing Tournament 240 240 51342.0 0.000000 0.00000 **** 4887.000 Fitness sharing, Tournament 2 5 p < 1e-04
phenotype_num_taxa Lexicase Random 240 240 47218.0 0.000000 0.00000 **** 5450.333 Lexicase, Random 3 4 p < 1e-04
phenotype_num_taxa Lexicase Tournament 240 240 43850.0 0.000000 0.00000 **** 6013.667 Lexicase , Tournament 3 5 p < 1e-04
phenotype_num_taxa Random Tournament 240 240 25692.5 0.041000 0.41000 ns 6577.000 Random , Tournament 4 5 p = 0.41

Relationship between phenotypic and phylogenetic diversity

ggplot(
    final_data,
    aes(
        y=phenotype_num_taxa,
        x=mean_phenotype_pairwise_distance,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Phenotypic richness"
  ) +
  scale_x_continuous(
        name="Mean pairwise distance"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")

Relationship between diversity and success

Earlier in run

ggplot(
    data %>% filter(generation==500),
    aes(
        y=max_performance,
        x=mean_phenotype_pairwise_distance,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Average trait performance"
  ) +
  scale_x_continuous(
        name="Mean pairwise distance"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")

ggplot(
    data %>% filter(generation==500),
    aes(
        y=max_performance,
        x=phenotype_num_taxa,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Average trait performance"
  ) +
  scale_x_continuous(
        name="Phenotypic richness"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")

phylogney_vs_performance <- ggplot(
    data %>% filter(generation==1000),
    aes(
        y=max_performance,
        x=mean_phenotype_pairwise_distance,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Average trait performance"
  ) +
  scale_x_continuous(
        name="Mean pairwise distance"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")
  
phylogney_vs_performance

ggplot(
    data %>% filter(generation==1000),
    aes(
        y=max_performance,
        x=phenotype_num_taxa,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Average trait performance"
  ) +
  scale_x_continuous(
        name="Phenotypic richness"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")

End of run

ggplot(
    final_data,
    aes(
        y=max_performance,
        x=mean_phenotype_pairwise_distance,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Average trait performance"
  ) +
  scale_x_continuous(
        name="Mean pairwise distance"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")

ggplot(
    final_data,
    aes(
        y=max_performance,
        x=phenotype_num_taxa,
        color=selection_name,
        fill=selection_name
    )
  ) +
  geom_point() +
    scale_y_continuous(
        name="Average trait performance"
  ) +
  scale_x_continuous(
        name="Phenotypic richness"
  ) + 
  facet_wrap(
      ~selection_name*problem_name, scales="free"
  ) + 
  stat_smooth(
    method="lm"
  ) + 
  stat_cor(
    method="spearman", cor.coef.name = "rho", color="black"
  ) +
  theme(legend.position = "none")

Causality analysis

res <- data %>% group_by(SEED, selection_name, problem_name) %>%
summarise(
  fit_phylo_10 = condinformation(discretize(max_performance), discretize(lag(max_phenotype_pairwise_distance, 1)), discretize(lag(max_performance, 1))),
  fit_phylo_100 = condinformation(discretize(max_performance), discretize(lag(max_phenotype_pairwise_distance, 10)), discretize(lag(max_performance, 10))),
  fit_phylo_1000 = condinformation(discretize(max_performance), discretize(lag(max_phenotype_pairwise_distance, 100)), discretize(lag(max_performance, 100))),
    fit_pheno_10 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 1)), discretize(lag(max_performance, 1))),
      fit_pheno_100 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 10)), discretize(lag(max_performance, 10))),
      fit_pheno_1000 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 100)), discretize(lag(max_performance, 100)))
      )

res <- res %>% pivot_longer(cols=contains("o_10"))
res$offset <- str_extract(res$name, "[:digit:]*$")
res$Type <- case_when(str_detect(res$name, "phylo") ~ "Phylogenetic", TRUE ~ "Phenotypic")

ggplot(
  res %>% filter(str_detect(name, "fit_ph*")), 
  aes(
    x=as.factor(offset), 
    y=value, 
    color=Type
    )
  ) + 
  geom_boxplot() + 
  facet_wrap(~problem_name*selection_name) + 
  scale_x_discrete("Lag") + 
  scale_y_continuous("Transfer Entropy") + 
  scale_color_discrete("") + 
  theme(legend.position = "bottom")

res <- data %>% group_by(SEED, selection_name, problem_name) %>%
summarise(
  fit_phylo_10 = condinformation(discretize(max_performance), discretize(lag(mean_phenotype_evolutionary_distinctiveness, 1)), discretize(lag(max_performance, 1))),
  fit_phylo_100 = condinformation(discretize(max_performance), discretize(lag(mean_phenotype_evolutionary_distinctiveness, 10)), discretize(lag(max_performance, 10))),
  fit_phylo_1000 = condinformation(discretize(max_performance), discretize(lag(mean_phenotype_evolutionary_distinctiveness, 100)), discretize(lag(max_performance, 100))),
    fit_pheno_10 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 1)), discretize(lag(max_performance, 1))),
      fit_pheno_100 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 10)), discretize(lag(max_performance, 10))),
      fit_pheno_1000 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 100)), discretize(lag(max_performance, 100)))
      )

res <- res %>% pivot_longer(cols=contains("o_10"))
res$offset <- str_extract(res$name, "[:digit:]*$")
res$Type <- case_when(str_detect(res$name, "phylo") ~ "Phylogenetic", TRUE ~ "Phenotypic")

ggplot(
  res %>% filter(str_detect(name, "fit_ph*")), 
  aes(
    x=as.factor(offset), 
    y=value, 
    color=Type
    )
  ) + 
  geom_boxplot() + 
  facet_wrap(~problem_name*selection_name) + 
  scale_x_discrete("Lag") + 
  scale_y_continuous("Transfer Entropy") + 
  scale_color_discrete("") + 
  theme(legend.position = "bottom")

res <- data %>% group_by(SEED, selection_name, problem_name) %>%
summarise(
  fit_phylo_10 = condinformation(discretize(max_performance), discretize(lag(variance_phenotype_evolutionary_distinctiveness, 1)), discretize(lag(max_performance, 1))),
  fit_phylo_100 = condinformation(discretize(max_performance), discretize(lag(variance_phenotype_evolutionary_distinctiveness, 10)), discretize(lag(max_performance, 10))),
  fit_phylo_1000 = condinformation(discretize(max_performance), discretize(lag(variance_phenotype_evolutionary_distinctiveness, 100)), discretize(lag(max_performance, 100))),
    fit_pheno_10 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 1)), discretize(lag(max_performance, 1))),
      fit_pheno_100 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 10)), discretize(lag(max_performance, 10))),
      fit_pheno_1000 = condinformation(discretize(max_performance), discretize(lag(phenotype_num_taxa, 100)), discretize(lag(max_performance, 100)))
      )

res <- res %>% pivot_longer(cols=contains("o_10"))
res$offset <- str_extract(res$name, "[:digit:]*$")
res$Type <- case_when(str_detect(res$name, "phylo") ~ "Phylogenetic", TRUE ~ "Phenotypic")

ggplot(
  res %>% filter(str_detect(name, "fit_ph*")), 
  aes(
    x=as.factor(offset), 
    y=value, 
    color=Type
    )
  ) + 
  geom_boxplot() + 
  facet_wrap(~problem_name*selection_name) + 
  scale_x_discrete("Lag") + 
  scale_y_continuous("Transfer Entropy") + 
  scale_color_discrete("") + 
  theme(legend.position = "bottom")

res <- data %>% group_by(SEED, selection_name, problem_name) %>%
summarise(
  phen_phylo_10 = condinformation(discretize(phenotype_num_taxa), discretize(lag(max_phenotype_pairwise_distance, 1)), discretize(lag(phenotype_num_taxa, 1))),
  phen_phylo_100 = condinformation(discretize(phenotype_num_taxa), discretize(lag(max_phenotype_pairwise_distance, 10)), discretize(lag(phenotype_num_taxa, 10))),
  pheno_phylo_1000 = condinformation(discretize(phenotype_num_taxa), discretize(lag(max_phenotype_pairwise_distance, 100)), discretize(lag(phenotype_num_taxa, 100))),
    phylo_pheno_10 = condinformation(discretize(max_phenotype_pairwise_distance), discretize(lag(phenotype_num_taxa, 1)), discretize(lag(max_phenotype_pairwise_distance, 1))),
      phylo_pheno_100 = condinformation(discretize(max_phenotype_pairwise_distance), discretize(lag(phenotype_num_taxa, 10)), discretize(lag(max_phenotype_pairwise_distance, 10))),
      phylo_pheno_1000 = condinformation(discretize(max_phenotype_pairwise_distance), discretize(lag(phenotype_num_taxa, 100)), discretize(lag(max_phenotype_pairwise_distance, 100)))
      )

res <- res %>% pivot_longer(cols=contains("phylo"))
res$offset <- str_extract(res$name, "[:digit:]*$")
res$Type <- case_when(str_detect(res$name, "phylo_pheno") ~ "Phenotypic -> Phylogenetic", TRUE ~ "Phylogenetic -> Phenotypic")

ggplot(
  res, 
  aes(
    x=as.factor(offset), 
    y=value, 
    color=Type
    )
  ) + 
  geom_boxplot() + 
  facet_wrap(~problem_name*selection_name) + 
  scale_x_discrete("Lag") + 
  scale_y_continuous("Transfer Entropy") + 
  scale_color_discrete("") + 
  theme(legend.position = "bottom")

res <- data %>% group_by(SEED, selection_name, problem_name) %>%
summarise(
  phen_phylo_10 = condinformation(discretize(phenotype_num_taxa), discretize(lag(max_phenotype_evolutionary_distinctiveness, 1)), discretize(lag(phenotype_num_taxa, 1))),
  phen_phylo_100 = condinformation(discretize(phenotype_num_taxa), discretize(lag(max_phenotype_evolutionary_distinctiveness, 10)), discretize(lag(phenotype_num_taxa, 10))),
  pheno_phylo_1000 = condinformation(discretize(phenotype_num_taxa), discretize(lag(max_phenotype_evolutionary_distinctiveness, 100)), discretize(lag(phenotype_num_taxa, 100))),
    phylo_pheno_10 = condinformation(discretize(max_phenotype_evolutionary_distinctiveness), discretize(lag(phenotype_num_taxa, 1)), discretize(lag(max_phenotype_evolutionary_distinctiveness, 1))),
      phylo_pheno_100 = condinformation(discretize(max_phenotype_evolutionary_distinctiveness), discretize(lag(phenotype_num_taxa, 10)), discretize(lag(max_phenotype_evolutionary_distinctiveness, 10))),
      phylo_pheno_1000 = condinformation(discretize(max_phenotype_evolutionary_distinctiveness), discretize(lag(phenotype_num_taxa, 100)), discretize(lag(max_phenotype_evolutionary_distinctiveness, 100)))
      )

res <- res %>% pivot_longer(cols=contains("phylo"))
res$offset <- str_extract(res$name, "[:digit:]*$")
res$Type <- case_when(str_detect(res$name, "phylo_pheno") ~ "Phenotypic -> Phylogenetic", TRUE ~ "Phylogenetic -> Phenotypic")

ggplot(
  res, 
  aes(
    x=as.factor(offset), 
    y=value, 
    color=Type
    )
  ) + 
  geom_boxplot() + 
  facet_wrap(~problem_name*selection_name) + 
  scale_x_discrete("Lag") + 
  scale_y_continuous("Transfer Entropy") + 
  scale_color_discrete("") + 
  theme(legend.position = "bottom")

Manuscript figures

# legend <- cowplot::get_legend(
#     elite_ave_performance_fig +
#       guides(
#         color=guide_legend(nrow=1),
#         fill=guide_legend(nrow=1)
#       ) +
#       theme(
#         legend.position = "bottom",
#         legend.box="horizontal",
#         legend.justification="center"
#       )
#   )
# 
# grid <- plot_grid(
#   elite_ave_performance_fig +
#     ggtitle("Performance over time") +
#     labs(subtitle="") +
#     theme(legend.position="none"),
#   elite_final_performance_fig +
#     ggtitle("Final performance") +
#     theme(),
#   unique_start_position_coverage_fig +
#     ggtitle("Activation position coverage over time") +
#     labs(subtitle="") +
#     theme(legend.position="none"),
#   unique_start_positions_coverage_final_fig +
#     ggtitle("Final activation position coverage") +
#     theme(),
#   nrow=2,
#   ncol=2,
#   rel_widths=c(3,2),
#   labels="auto"
# )
# 
# grid <- plot_grid(
#   grid,
#   legend,
#   nrow=2,
#   ncol=1,
#   rel_heights=c(1, 0.1)
# )
# 
# save_plot(
#   "tournament-vs-lexicase-panel.pdf",
#   grid,
#   base_width=12,
#   base_height=8
# )
# 
# grid